head(asasec,n = 5)
##                                Section         Sname Beginning Revenues
## 1      Aging and the Life Course (018)         Aging     12752    12104
## 2     Alcohol, Drugs and Tobacco (030) Alcohol/Drugs     11933     1144
## 3 Altruism and Social Solidarity (047)      Altruism      1139     1862
## 4            Animals and Society (042)       Animals       473      820
## 5             Asia/Asian America (024)          Asia      9056     2116
##   Expenses Ending Journal Year Members
## 1    12007  12849      No 2005     598
## 2      400  12677      No 2005     301
## 3     1875   1126      No 2005      NA
## 4     1116    177      No 2005     209
## 5     1710   9462      No 2005     365

Figure 8.1: Back to basics

p <- ggplot(data = subset(asasec,Year == 2014),
            mapping = aes(x = Members,
                          y = Revenues,
                          label = Sname))

p + geom_point() + geom_smooth()

Figure 8.2: Refining the plot

#Introduce some outliers, switch from loess to OLS and introduce a third variable

p <- ggplot(data = subset(asasec, Year == 2014),
            mapping = aes(x = Members,
                          y = Revenues,
                          label = Sname))
p + geom_point(mapping = aes(color = Journal)) + 
    geom_smooth(method = "lm") + 
    theme(legend.position = "top")

Figure 8.3: Refining the axes

p0 <- ggplot(data = subset(asasec, Year == 2014),
             mapping = aes(x = Members,
                           y = Revenues,
                           label = Sname))

p1 <- p0 + geom_smooth(method = "lm", se = FALSE, color = "gray80") + 
    geom_point(mapping = aes(color = Journal))

p2 <- p1 + geom_text_repel(data = subset(asasec, Year == 2014 &
                                             Revenues > 7000), size = 2)

##Changing the axes
p3 <- p2 + labs(x = "Membership",
                y = "Revenues",
                color = "Section has own Journal",
                title = "ASA Sections",
                subtitle = "2014 Calendar Year.",
                caption = "Source: ASA annual report.")
p4 <- p3 + scale_y_continuous(labels = scales::dollar) + 
    theme_bw() + 
    theme(legend.position = "bottom")

p4

8.1 Use color to your advantage

Figure 8.7.1: Color palette Set2

p <- ggplot(data = drop_na(organdata,world),
            mapping = aes(x = roads,
                          y = donors,
                          color = world))

p + geom_point(size = 2) + 
    scale_color_brewer(palette = "Set2") + 
    theme(legend.position = "top")

Figure 8.7.2: Color palette Pastel2

p <- ggplot(data = drop_na(organdata,world),
            mapping = aes(x = roads,
                          y = donors,
                          color = world))

p + geom_point(size = 2) + 
    scale_color_brewer(palette = "Pastel2") + 
    theme(legend.position = "top")

Figure 8.7.2: Color palette Dark2

p <- ggplot(data = drop_na(organdata,world),
            mapping = aes(x = roads,
                          y = donors,
                          color = world))

p + geom_point(size = 2) + 
    scale_color_brewer(palette = "Dark2") + 
    theme(legend.position = "top")

Figure 8.8: Color blind friendly palette

cb_palette <- c("#999999","#E69F00","#56B4E9",
                "#009E73","#F0E442","#0072B2",
                "#D55E00","#CC79A7")

p4 + scale_color_manual(values = cb_palette)

Layer color and text together

Figure 8.10: The background layer

party_colors <- c("#2E74C0",
                  "#CB454A")

p0 <- ggplot(data = subset(county_data,
                           flipped == "No"),
             mapping = aes(x = pop,
                           y = black/100))

p1 <- p0 + geom_point(alpha = 0.15,color = "gray50") + 
    scale_x_log10(label = scales::comma) ##Interesting use of scales::comma

p1

Figure 8.11: Using a second layer that is a complement of the first

p2 <- p1 + geom_point(
    data = subset(county_data,flipped == "Yes"),
    mapping = aes(color = partywinner16)
) + scale_color_manual(values = party_colors)

p2

Figure 8.12: Adding guides and labels, and fixing the y scale

p3 <- p2 + scale_y_continuous(labels = scales::percent) + 
    labs(color = "County flipped to ...",
         x = "County Population (log scale)",
         y = "Percent Black Population",
         title = "Flipped counties, 2016",
         caption = "Counties in gray did not flip.")
p3 + theme_bw()

Figure 8.13: County-level election data from 2016

p4 <- p3 + theme_bw() + 
    geom_text_repel(data = subset(county_data,
                                  flipped == "Yes" &
                                      black > 25),
                    mapping = aes(x = pop,
                                  y = black/100,
                                  label = state), size = 2)
p4 + theme_minimal() +
    theme(legend.position = "top")

8.3 Change the apperance of plots with themes

Figure 8.14: Economist and WSJ theme

library(ggthemes)

##Setting the economist theme
theme_set(theme_economist())
p4 + theme_economist() + 
    theme(legend.position = "top")

##Setting the WSJ theme
theme_set(theme_wsj())
p4 + theme_wsj() +  
    theme(plot.title = element_text(size = rel(0.6)),
           legend.title = element_text(size = rel(0.35)),
           plot.caption = element_text(size = rel(0.35)),
           legend.position = "top")

Use theme elements in a substantive way

Figure 8.16: A customized small multiple

Figure 8.17: Ridgeplot version of the age distribution plot

library(ggridges)

theme_set(theme_minimal())


p <- ggplot(data = gss_lon,
            mapping = aes(x = age, y = factor(year, levels = rev(unique(year)),ordered = TRUE)))

p + geom_density_ridges(alpha =0.6, fill = "lightblue", scale = 1.5) + 
    scale_x_continuous(breaks = c(25,50,75)) + 
    scale_y_discrete(expand = c(0,4)) + 
    labs(x = "Age", y = NULL,
         title = "Age distribution of\nGSS respondents") + 
    theme(title = element_text(size = 14, face = "bold"))

8.5 Case studies

Two y-axes

  • Most of the time when people draw plots with two y-axes they want to line the series up as closely as possible because they suspect that there is a substantive association between them.

  • The main problem with using two y-axes is tha tit makes it even easier than usual to foll yourself (or someone else) about the degree of association between the variables, because you can adjust the scaling of the axes relative to one another in a way that moves the data series around.

*Index numbers can have complications of their own, but they allow us to use one axis instead of two, and to calculate a sensible difference between the two series and plot that.

Figure 8.20: Indexed series with a running difference below, using separate plots

fredts_m <- fredts %>% select(date, sp500_i, monbase_i) %>%
    gather(key = series, value = score, sp500_i:monbase_i)

p <- ggplot(data = fredts_m,
            aes(x = date,
                y = score,
                group = series,
                color = series))
p1 <- p + geom_line() + 
    theme(legend.position = "top") + 
    labs(x = "Date",
         y = "Index", 
         color = "Series")

p <- ggplot(data = fredts,
            mapping = aes(x = date, y = sp500_i - monbase_i))

p2 <- p + geom_line() + 
    labs(x = "Date",
         y = "Difference")


cowplot::plot_grid(p1, p2, nrow = 2, rel_heights = c(0.75,0.25),
                   align = "v")